We plot the data and can see that there is no obvious large difference between the debt levels or scenarios.
data.frame(
High_debt_version = d.both_completed %>%
filter(high_debt_version == "true") %>%
pull(quality_pre_task) %>%
revalue(c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
)),
Low_debt_version = d.both_completed %>%
filter(high_debt_version == "false") %>%
pull(quality_pre_task) %>%
revalue(c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
))
) %>%
likert() %>%
plot(
type="density",
facet = TRUE,
)
data.frame(
Tickets = d.both_completed %>%
filter(scenario == "tickets") %>%
pull(quality_pre_task) %>%
revalue(c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
)),
Booking = d.both_completed %>%
filter(scenario == "booking") %>%
pull(quality_pre_task) %>%
revalue(c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
))
) %>%
likert() %>%
plot(
type="density",
facet = TRUE,
)
As the data is collected from a likert scale we will use a cumulative family, indicating that each level on the scale is an incremental step. This model is also able to fit the data well.
We include high_debt_verison as a predictor in our model as this variable represent the very effect we want to measure. We also include a varying intercept for each individual to prevent the model from learning too much from single participants with extreme measurements.
We iterate over the model until we have sane priors.
scenario_quality.with <- extendable_model(
base_name = "scenario_quality",
base_formula = "quality_pre_task ~ 1 + high_debt_version + (1 | session)",
base_priors = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = d.both_completed,
)
prior_summary(scenario_quality.with(only_priors= TRUE))
prior_summary(scenario_quality.with(sample_prior = "only"))
pp_check(scenario_quality.with(sample_prior = "only"), nsamples = 200, type = "bars")
We choose a beta parameter priors allowing for the beta parameter to account for 100% of the effect but that is skeptical to such strong effects from the beta parameter.
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 0, 2)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100
data.frame(x = sim.beta.diff) %>%
ggplot(aes(x)) +
geom_density() +
xlim(-150, 150) +
labs(
title = "Beta parameter prior influence",
x = "Estimate with beta as % of estimate without beta",
y = "Density"
)
We check the posterior distribution and can see that the model seems to have been able to fit the data well. Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.
pp_check(scenario_quality.with(), nsamples = 200, type = "bars")
summary(scenario_quality.with())
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(data) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.37 0.30 0.01 1.12 1.00 1955 1935
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -1.93 0.54 -3.08 -0.98 1.00 4023
## Intercept[2] -0.57 0.41 -1.41 0.21 1.00 5684
## Intercept[3] 0.22 0.41 -0.58 1.02 1.00 5406
## Intercept[4] 0.61 0.42 -0.17 1.42 1.00 5223
## Intercept[5] 1.91 0.49 1.02 2.91 1.00 4828
## Intercept[6] 3.97 0.72 2.65 5.51 1.00 5686
## high_debt_versionfalse 1.37 0.50 0.40 2.36 1.00 5604
## Tail_ESS
## Intercept[1] 2860
## Intercept[2] 3484
## Intercept[3] 3796
## Intercept[4] 3670
## Intercept[5] 3379
## Intercept[6] 3075
## high_debt_versionfalse 2993
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(scenario_quality.with(), ask = FALSE)
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
We use loo to check some possible extensions on the model.
loo_result <- loo(
# Benchmark model(s)
scenario_quality.with(),
# New model(s)
scenario_quality.with("work_domain"),
scenario_quality.with("work_experience_programming.s"),
scenario_quality.with("work_experience_java.s"),
scenario_quality.with("education_field"),
scenario_quality.with("mo(education_level)", edlvl_prior),
scenario_quality.with("workplace_peer_review"),
scenario_quality.with("workplace_td_tracking"),
scenario_quality.with("workplace_pair_programming"),
scenario_quality.with("workplace_coding_standards"),
scenario_quality.with("scenario"),
scenario_quality.with("group")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## scenario_quality.with("work_experience_programming.s") 0.0 0.0
## scenario_quality.with("workplace_coding_standards") -0.2 1.7
## scenario_quality.with("workplace_peer_review") -0.5 1.7
## scenario_quality.with("work_experience_java.s") -0.5 0.5
## scenario_quality.with() -1.1 1.7
## scenario_quality.with("mo(education_level)", edlvl_prior) -1.2 1.6
## scenario_quality.with("workplace_td_tracking") -1.6 1.5
## scenario_quality.with("workplace_pair_programming") -1.7 1.7
## scenario_quality.with("scenario") -1.8 1.8
## scenario_quality.with("education_field") -2.1 1.8
## scenario_quality.with("group") -2.1 1.6
## scenario_quality.with("work_domain") -2.8 2.0
loo_result[1]
## $loos
## $loos$`scenario_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.4 4.1
## p_loo 8.5 0.9
## looic 162.7 8.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1728
## (0.5, 0.7] (ok) 1 2.3% 1455
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_domain")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -83.0 4.4
## p_loo 12.9 1.6
## looic 166.0 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 864
## (0.5, 0.7] (ok) 1 2.3% 1024
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.2
## p_loo 9.1 0.9
## looic 160.4 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1939
## (0.5, 0.7] (ok) 2 4.5% 1859
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.2
## p_loo 9.4 1.0
## looic 161.5 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1511
## (0.5, 0.7] (ok) 5 11.4% 1722
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.3 4.1
## p_loo 10.1 1.1
## looic 164.7 8.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.4 4.2
## p_loo 9.4 1.0
## looic 162.9 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1584
## (0.5, 0.7] (ok) 1 2.3% 3657
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 8.9 0.9
## looic 161.4 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1777
## (0.5, 0.7] (ok) 2 4.5% 4871
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.8 4.0
## p_loo 9.3 0.9
## looic 163.7 8.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.9 4.2
## p_loo 9.6 1.0
## looic 163.8 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.4 4.2
## p_loo 9.0 0.9
## looic 160.8 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1678
## (0.5, 0.7] (ok) 2 4.5% 1895
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.0 4.0
## p_loo 9.2 0.9
## looic 164.0 8.0
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1511
## (0.5, 0.7] (ok) 2 4.5% 3203
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("group")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.4 4.3
## p_loo 10.6 1.1
## looic 164.7 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
scenario_quality.with(),
scenario_quality.with("work_experience_programming.s"),
scenario_quality.with("workplace_coding_standards"),
scenario_quality.with("workplace_peer_review"),
scenario_quality.with("work_experience_java.s"),
# New model(s)
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")),
scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")),
scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s")),
scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review")),
scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")),
scenario_quality.with(c("workplace_peer_review", "work_experience_java.s"))
)
loo_result[2]
## $diffs
## elpd_diff
## scenario_quality.with("work_experience_programming.s") 0.0
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) -0.1
## scenario_quality.with("workplace_coding_standards") -0.2
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) -0.4
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s")) -0.4
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) -0.5
## scenario_quality.with("workplace_peer_review") -0.5
## scenario_quality.with("work_experience_java.s") -0.5
## scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review")) -0.7
## scenario_quality.with(c("workplace_peer_review", "work_experience_java.s")) -0.7
## scenario_quality.with() -1.1
## se_diff
## scenario_quality.with("work_experience_programming.s") 0.0
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) 0.9
## scenario_quality.with("workplace_coding_standards") 1.7
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) 0.7
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s")) 0.3
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) 1.0
## scenario_quality.with("workplace_peer_review") 1.7
## scenario_quality.with("work_experience_java.s") 0.5
## scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review")) 1.7
## scenario_quality.with(c("workplace_peer_review", "work_experience_java.s")) 0.9
## scenario_quality.with() 1.7
loo_result[1]
## $loos
## $loos$`scenario_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.4 4.1
## p_loo 8.5 0.9
## looic 162.7 8.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1728
## (0.5, 0.7] (ok) 1 2.3% 1455
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.2
## p_loo 9.1 0.9
## looic 160.4 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1939
## (0.5, 0.7] (ok) 2 4.5% 1859
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.4 4.2
## p_loo 9.0 0.9
## looic 160.8 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1678
## (0.5, 0.7] (ok) 2 4.5% 1895
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 8.9 0.9
## looic 161.4 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1777
## (0.5, 0.7] (ok) 2 4.5% 4871
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.2
## p_loo 9.4 1.0
## looic 161.5 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1511
## (0.5, 0.7] (ok) 5 11.4% 1722
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.3 4.3
## p_loo 9.7 0.9
## looic 160.6 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 1818
## (0.5, 0.7] (ok) 3 6.8% 1983
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.6 4.4
## p_loo 9.9 1.0
## looic 161.2 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.6 4.2
## p_loo 9.5 1.0
## looic 161.3 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1158
## (0.5, 0.7] (ok) 2 4.5% 1648
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.3
## p_loo 9.7 1.1
## looic 161.8 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1632
## (0.5, 0.7] (ok) 1 2.3% 1812
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 9.9 1.0
## looic 161.4 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1787
## (0.5, 0.7] (ok) 1 2.3% 1657
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_peer_review", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.4
## p_loo 9.9 1.0
## looic 161.9 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1799
## (0.5, 0.7] (ok) 6 13.6% 1221
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
scenario_quality.with(),
scenario_quality.with("work_experience_programming.s"),
scenario_quality.with("workplace_coding_standards"),
scenario_quality.with("workplace_peer_review"),
scenario_quality.with("work_experience_java.s"),
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")),
scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")),
scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")),
# New model(s)
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review")),
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s")),
scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review")),
scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review"))
)
loo_result[2]
## $diffs
## elpd_diff
## scenario_quality.with("work_experience_programming.s") 0.0
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) -0.1
## scenario_quality.with("workplace_coding_standards") -0.2
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) -0.4
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) -0.5
## scenario_quality.with("workplace_peer_review") -0.5
## scenario_quality.with("work_experience_java.s") -0.5
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s")) -0.5
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review")) -0.8
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review")) -0.9
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review")) -1.0
## scenario_quality.with() -1.1
## se_diff
## scenario_quality.with("work_experience_programming.s") 0.0
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) 0.9
## scenario_quality.with("workplace_coding_standards") 1.7
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) 0.7
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) 1.0
## scenario_quality.with("workplace_peer_review") 1.7
## scenario_quality.with("work_experience_java.s") 0.5
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s")) 0.9
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review")) 1.0
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review")) 1.0
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review")) 0.7
## scenario_quality.with() 1.7
loo_result[1]
## $loos
## $loos$`scenario_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.4 4.1
## p_loo 8.5 0.9
## looic 162.7 8.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1728
## (0.5, 0.7] (ok) 1 2.3% 1455
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.2
## p_loo 9.1 0.9
## looic 160.4 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1939
## (0.5, 0.7] (ok) 2 4.5% 1859
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.4 4.2
## p_loo 9.0 0.9
## looic 160.8 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1678
## (0.5, 0.7] (ok) 2 4.5% 1895
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 8.9 0.9
## looic 161.4 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1777
## (0.5, 0.7] (ok) 2 4.5% 4871
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.2
## p_loo 9.4 1.0
## looic 161.5 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1511
## (0.5, 0.7] (ok) 5 11.4% 1722
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.3 4.3
## p_loo 9.7 0.9
## looic 160.6 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 1818
## (0.5, 0.7] (ok) 3 6.8% 1983
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.6 4.4
## p_loo 9.9 1.0
## looic 161.2 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 9.9 1.0
## looic 161.4 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1787
## (0.5, 0.7] (ok) 1 2.3% 1657
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.0 4.4
## p_loo 10.6 1.1
## looic 162.0 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 10.1 1.0
## looic 161.5 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 944
## (0.5, 0.7] (ok) 2 4.5% 2540
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.2 4.4
## p_loo 10.4 1.0
## looic 162.4 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1057
## (0.5, 0.7] (ok) 1 2.3% 1522
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.1 4.4
## p_loo 10.5 1.1
## looic 162.3 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1606
## (0.5, 0.7] (ok) 1 2.3% 1766
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
We pick some of our top performing models as candidates and inspect them closer.
The candidate models are named and listed in order of complexity.
We select the simplest model as a baseline.
scenario_quality0 <- brm(
"quality_pre_task ~ 1 + high_debt_version + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality0",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality0)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.37 0.30 0.01 1.12 1.00 1955 1935
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -1.93 0.54 -3.08 -0.98 1.00 4023
## Intercept[2] -0.57 0.41 -1.41 0.21 1.00 5684
## Intercept[3] 0.22 0.41 -0.58 1.02 1.00 5406
## Intercept[4] 0.61 0.42 -0.17 1.42 1.00 5223
## Intercept[5] 1.91 0.49 1.02 2.91 1.00 4828
## Intercept[6] 3.97 0.72 2.65 5.51 1.00 5686
## high_debt_versionfalse 1.37 0.50 0.40 2.36 1.00 5604
## Tail_ESS
## Intercept[1] 2860
## Intercept[2] 3484
## Intercept[3] 3796
## Intercept[4] 3670
## Intercept[5] 3379
## Intercept[6] 3075
## high_debt_versionfalse 2993
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality0)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.145808643 0.4248984 -1.1949402 0.5746195
## 6033d90a5af2c702367b3a96 0.010889353 0.4027784 -0.8648566 0.9186598
## 6034fc165af2c702367b3a98 -0.014139016 0.3898703 -0.8812689 0.8228452
## 603500725af2c702367b3a99 0.278401903 0.5592082 -0.4480578 1.8078078
## 603f97625af2c702367b3a9d -0.025614765 0.4025079 -0.9741733 0.8118892
## 603fd5d95af2c702367b3a9e 0.029689046 0.4237631 -0.8545135 0.9521520
## 60409b7b5af2c702367b3a9f -0.041044832 0.3932839 -0.9830847 0.7521488
## 604b82b5a7718fbed181b336 0.229636160 0.5094315 -0.5089644 1.5880900
## 6050c1bf856f36729d2e5218 -0.106199391 0.4382125 -1.2141728 0.6825527
## 6050e1e7856f36729d2e5219 0.009089846 0.4345471 -0.9334836 0.9755679
## 6055fdc6856f36729d2e521b 0.008980702 0.3847572 -0.8205482 0.8926897
## 60589862856f36729d2e521f -0.150708085 0.4433861 -1.2909802 0.5624619
## 605afa3a856f36729d2e5222 -0.015244970 0.3923156 -0.8588107 0.8227719
## 605c8bc6856f36729d2e5223 -0.152145038 0.4485480 -1.3162503 0.5817486
## 605f3f2d856f36729d2e5224 -0.154915352 0.4605884 -1.3723010 0.5803257
## 605f46c3856f36729d2e5225 0.067034998 0.4079185 -0.7292448 1.0766500
## 60605337856f36729d2e5226 -0.083013356 0.4024577 -1.0944837 0.7104604
## 60609ae6856f36729d2e5228 0.003131549 0.4121036 -0.9170141 0.9062533
## 6061ce91856f36729d2e522e 0.015189729 0.4217685 -0.8343022 0.9733779
## 6061f106856f36729d2e5231 0.241676436 0.4979721 -0.4188072 1.6213750
## 6068ea9f856f36729d2e523e -0.059896999 0.4270294 -1.0585490 0.7845769
## 6075ab05856f36729d2e5247 0.070761429 0.4008793 -0.7451969 1.0260368
plot(scenario_quality0, ask = FALSE)
pp_check(scenario_quality0, nsamples = 200, type = "bars")
We select the best performing model with one variable.
scenario_quality1 <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality1",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality1)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.34 0.28 0.01 1.02 1.00 1996 1992
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.96 0.54 -3.07 -0.97 1.00
## Intercept[2] -0.54 0.42 -1.37 0.25 1.00
## Intercept[3] 0.25 0.41 -0.58 1.05 1.00
## Intercept[4] 0.65 0.43 -0.20 1.48 1.00
## Intercept[5] 1.98 0.48 1.07 2.94 1.00
## Intercept[6] 4.09 0.72 2.79 5.58 1.00
## high_debt_versionfalse 1.46 0.50 0.49 2.46 1.00
## work_experience_programming.s -0.54 0.30 -1.14 0.02 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 4001 3187
## Intercept[2] 5984 3454
## Intercept[3] 5672 3254
## Intercept[4] 5190 3618
## Intercept[5] 4921 3641
## Intercept[6] 5569 3280
## high_debt_versionfalse 6900 2831
## work_experience_programming.s 5667 3206
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality1)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.159433967 0.4222374 -1.2195278 0.5324355
## 6033d90a5af2c702367b3a96 -0.015543701 0.3761998 -0.8638936 0.7832535
## 6034fc165af2c702367b3a98 -0.042729862 0.3738382 -0.9355357 0.7190091
## 603500725af2c702367b3a99 0.234581683 0.5184966 -0.4245614 1.7083458
## 603f97625af2c702367b3a9d -0.062785160 0.3935959 -1.0109665 0.6876793
## 603fd5d95af2c702367b3a9e 0.028150330 0.4083989 -0.8415633 1.0003215
## 60409b7b5af2c702367b3a9f -0.066476703 0.3818330 -1.0111193 0.6554037
## 604b82b5a7718fbed181b336 0.202113370 0.4712225 -0.4841701 1.4651678
## 6050c1bf856f36729d2e5218 -0.091786068 0.4058363 -1.0932963 0.6303960
## 6050e1e7856f36729d2e5219 0.006725887 0.4233766 -0.9116780 0.9146043
## 6055fdc6856f36729d2e521b -0.014445895 0.3954740 -0.9209347 0.8258082
## 60589862856f36729d2e521f -0.036633405 0.4128405 -1.0130460 0.7965760
## 605afa3a856f36729d2e5222 0.071220522 0.3932498 -0.6663469 1.0517088
## 605c8bc6856f36729d2e5223 -0.131112297 0.4220846 -1.2187647 0.5509834
## 605f3f2d856f36729d2e5224 -0.001081396 0.4104092 -0.9145712 0.8871584
## 605f46c3856f36729d2e5225 0.035923735 0.3825851 -0.7322183 0.8832729
## 60605337856f36729d2e5226 -0.103826004 0.3974821 -1.0814420 0.6448245
## 60609ae6856f36729d2e5228 -0.020517043 0.3960141 -0.8910282 0.8307672
## 6061ce91856f36729d2e522e -0.011281013 0.3956216 -0.9082552 0.8364204
## 6061f106856f36729d2e5231 0.182777387 0.4538511 -0.4902734 1.3889143
## 6068ea9f856f36729d2e523e -0.054519571 0.3972178 -1.0131963 0.7438614
## 6075ab05856f36729d2e5247 0.043030243 0.3823530 -0.7348279 0.9251660
plot(scenario_quality1, ask = FALSE)
pp_check(scenario_quality1, nsamples = 200, type = "bars")
We select the best performing model with two variables.
scenario_quality2 <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality2",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality2)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.34 0.28 0.01 1.05 1.00 2092 1852
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.74 0.59 -2.96 -0.66 1.00
## Intercept[2] -0.31 0.47 -1.23 0.61 1.00
## Intercept[3] 0.49 0.48 -0.45 1.44 1.00
## Intercept[4] 0.90 0.49 -0.07 1.85 1.00
## Intercept[5] 2.25 0.54 1.20 3.28 1.00
## Intercept[6] 4.39 0.78 2.95 6.00 1.00
## high_debt_versionfalse 1.46 0.50 0.47 2.44 1.00
## work_experience_programming.s -0.44 0.32 -1.08 0.19 1.00
## workplace_coding_standardsfalse 0.55 0.53 -0.48 1.58 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5001 3217
## Intercept[2] 6046 3513
## Intercept[3] 5123 3417
## Intercept[4] 4818 3232
## Intercept[5] 4548 3319
## Intercept[6] 5731 3224
## high_debt_versionfalse 5943 2983
## work_experience_programming.s 5691 3066
## workplace_coding_standardsfalse 5608 3186
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality2)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.1850866386 0.4384291 -1.3340180 0.4855577
## 6033d90a5af2c702367b3a96 0.0075140476 0.3863993 -0.8321943 0.8579651
## 6034fc165af2c702367b3a98 -0.0701978961 0.3758943 -0.9578170 0.6824377
## 603500725af2c702367b3a99 0.2130600807 0.4954732 -0.4602421 1.5466533
## 603f97625af2c702367b3a9d -0.0281558797 0.3732357 -0.9172471 0.7497192
## 603fd5d95af2c702367b3a9e 0.0085768526 0.4000502 -0.8736815 0.8766696
## 60409b7b5af2c702367b3a9f -0.0909559256 0.3848050 -1.0619832 0.5911907
## 604b82b5a7718fbed181b336 0.1684272787 0.4480970 -0.5132888 1.3873255
## 6050c1bf856f36729d2e5218 -0.0575260636 0.3670973 -0.9456209 0.6788483
## 6050e1e7856f36729d2e5219 0.0007994078 0.4386599 -0.9292900 0.9777997
## 6055fdc6856f36729d2e521b 0.0170605966 0.3979439 -0.8282522 0.9324863
## 60589862856f36729d2e521f -0.0363886288 0.3907343 -0.9545415 0.7499846
## 605afa3a856f36729d2e5222 0.0960886516 0.4021753 -0.6031806 1.1364945
## 605c8bc6856f36729d2e5223 -0.0962088139 0.4246298 -1.1668190 0.6899267
## 605f3f2d856f36729d2e5224 -0.0123978569 0.4147140 -0.9445681 0.8827722
## 605f46c3856f36729d2e5225 0.0696832011 0.3850693 -0.6216591 1.0197998
## 60605337856f36729d2e5226 -0.0641767609 0.3929412 -1.0205835 0.6657965
## 60609ae6856f36729d2e5228 -0.0507428335 0.4008238 -1.0412842 0.7429305
## 6061ce91856f36729d2e522e 0.0085419736 0.3948507 -0.8539918 0.8687434
## 6061f106856f36729d2e5231 0.1642983208 0.4297256 -0.4812203 1.2917675
## 6068ea9f856f36729d2e523e -0.0759442330 0.4117008 -1.0736785 0.6978153
## 6075ab05856f36729d2e5247 0.0160915146 0.3727621 -0.8039486 0.8304968
plot(scenario_quality2, ask = FALSE)
pp_check(scenario_quality2, nsamples = 200, type = "bars")
We select the best performing model with three variables.
scenario_quality3 <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + work_experience_java.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality3",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality3)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + work_experience_java.s + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.36 0.29 0.01 1.08 1.00 2460 1877
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.75 0.59 -2.98 -0.66 1.00
## Intercept[2] -0.31 0.47 -1.25 0.59 1.00
## Intercept[3] 0.50 0.48 -0.46 1.45 1.00
## Intercept[4] 0.91 0.49 -0.08 1.84 1.00
## Intercept[5] 2.25 0.55 1.20 3.35 1.00
## Intercept[6] 4.38 0.80 2.87 6.04 1.00
## high_debt_versionfalse 1.46 0.51 0.44 2.46 1.00
## work_experience_programming.s -0.37 0.63 -1.62 0.86 1.00
## workplace_coding_standardsfalse 0.56 0.55 -0.51 1.63 1.00
## work_experience_java.s -0.09 0.62 -1.28 1.17 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5124 3130
## Intercept[2] 5381 3355
## Intercept[3] 5023 3567
## Intercept[4] 4935 3339
## Intercept[5] 4403 3482
## Intercept[6] 6132 3348
## high_debt_versionfalse 6472 2923
## work_experience_programming.s 3914 3160
## workplace_coding_standardsfalse 5347 2768
## work_experience_java.s 3896 2977
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality3)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.1903833044 0.4506084 -1.4153100 0.4852661
## 6033d90a5af2c702367b3a96 0.0093025699 0.4186244 -0.8724130 0.9454708
## 6034fc165af2c702367b3a98 -0.0691445991 0.3824515 -0.9737297 0.6678971
## 603500725af2c702367b3a99 0.2311277379 0.5133153 -0.4584094 1.6174950
## 603f97625af2c702367b3a9d -0.0207399348 0.3879605 -0.9110777 0.8062503
## 603fd5d95af2c702367b3a9e 0.0049382755 0.4184704 -0.9122366 0.9282139
## 60409b7b5af2c702367b3a9f -0.1055862073 0.4076941 -1.1604295 0.6124789
## 604b82b5a7718fbed181b336 0.1924915130 0.4605045 -0.4765054 1.4408263
## 6050c1bf856f36729d2e5218 -0.0575276158 0.4013602 -1.0213810 0.7734848
## 6050e1e7856f36729d2e5219 0.0002545732 0.4474630 -0.9747042 1.0211738
## 6055fdc6856f36729d2e521b 0.0053660188 0.4110821 -0.8788615 0.9031909
## 60589862856f36729d2e521f -0.0367950813 0.4172521 -1.0326400 0.8359434
## 605afa3a856f36729d2e5222 0.1033646340 0.4184323 -0.6885361 1.1228533
## 605c8bc6856f36729d2e5223 -0.1065917832 0.4213731 -1.1817585 0.6337637
## 605f3f2d856f36729d2e5224 -0.0016435313 0.4319396 -0.9221958 0.9319698
## 605f46c3856f36729d2e5225 0.0716357076 0.4086795 -0.7389420 1.0378890
## 60605337856f36729d2e5226 -0.0671044044 0.4020815 -1.0200615 0.7132768
## 60609ae6856f36729d2e5228 -0.0433234757 0.4057871 -0.9945799 0.8118748
## 6061ce91856f36729d2e522e 0.0156192045 0.4105734 -0.8765265 0.9472047
## 6061f106856f36729d2e5231 0.1856322445 0.4646586 -0.4845219 1.4078920
## 6068ea9f856f36729d2e523e -0.0884572608 0.4335679 -1.1844307 0.7224257
## 6075ab05856f36729d2e5247 0.0221584312 0.3876377 -0.7987899 0.8770786
plot(scenario_quality3, ask = FALSE)
pp_check(scenario_quality3, nsamples = 200, type = "bars")
All candidate models look nice, none is significantly better than the others, we will proceed the model containing work experince as it otherwise ourd be added in the next step: scenario_quality1
Some participants did only complete one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.
scenario_quality1.all <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.completed),
file = "fits/scenario_quality1.all",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality1.all)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.32 0.27 0.01 1.00 1.00 2241 2352
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -2.08 0.54 -3.21 -1.07 1.00
## Intercept[2] -0.59 0.40 -1.41 0.17 1.00
## Intercept[3] 0.30 0.40 -0.50 1.07 1.00
## Intercept[4] 0.81 0.41 0.01 1.62 1.00
## Intercept[5] 2.07 0.47 1.16 3.04 1.00
## Intercept[6] 4.21 0.74 2.92 5.77 1.00
## high_debt_versionfalse 1.41 0.47 0.48 2.33 1.00
## work_experience_programming.s -0.52 0.27 -1.07 -0.01 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 4447 3193
## Intercept[2] 5947 3160
## Intercept[3] 5162 3582
## Intercept[4] 4933 3684
## Intercept[5] 4327 3378
## Intercept[6] 5201 3159
## high_debt_versionfalse 6359 3180
## work_experience_programming.s 5536 2843
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality1.all)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -0.030973291 0.3854158 -0.8996900 0.7694326
## 6033d69a5af2c702367b3a95 -0.136561496 0.3929666 -1.1464538 0.5008669
## 6033d90a5af2c702367b3a96 -0.018769543 0.3597007 -0.8408294 0.7565932
## 6034fc165af2c702367b3a98 -0.021765127 0.3477023 -0.8204487 0.7088100
## 603500725af2c702367b3a99 0.230712314 0.5017428 -0.4206965 1.6268883
## 603f84f15af2c702367b3a9b -0.009023566 0.3644119 -0.8182057 0.7776929
## 603f97625af2c702367b3a9d -0.042744744 0.3648838 -0.8926435 0.7129600
## 603fd5d95af2c702367b3a9e 0.012378490 0.3873651 -0.7956125 0.8910164
## 60409b7b5af2c702367b3a9f -0.047873935 0.3696613 -0.8868011 0.6597484
## 604b82b5a7718fbed181b336 0.185524519 0.4472555 -0.4606588 1.4185400
## 604f1239a7718fbed181b33f 0.022022527 0.3805942 -0.7979574 0.8850668
## 6050c1bf856f36729d2e5218 -0.078239995 0.3889774 -1.0521142 0.6543908
## 6050e1e7856f36729d2e5219 0.010330022 0.3942243 -0.8685386 0.8539365
## 6055fdc6856f36729d2e521b -0.013429287 0.3650010 -0.8338232 0.7646903
## 60579f2a856f36729d2e521e -0.132884272 0.4360632 -1.2761810 0.5667872
## 60589862856f36729d2e521f -0.025912696 0.3814473 -0.8940373 0.7428015
## 605a30a7856f36729d2e5221 0.089497327 0.4239680 -0.6657091 1.1517157
## 605afa3a856f36729d2e5222 0.090607496 0.3896112 -0.6174761 1.0546930
## 605c8bc6856f36729d2e5223 -0.104949142 0.4019869 -1.1328828 0.5965566
## 605f3f2d856f36729d2e5224 0.011176424 0.3847619 -0.8197675 0.9197957
## 605f46c3856f36729d2e5225 0.049884875 0.3684020 -0.6932974 0.9289825
## 60605337856f36729d2e5226 -0.076351740 0.3605451 -0.9599046 0.5831593
## 60609ae6856f36729d2e5228 -0.014562207 0.3768967 -0.8827419 0.7763377
## 6061ce91856f36729d2e522e -0.004064769 0.3649999 -0.7983834 0.7916814
## 6061f106856f36729d2e5231 0.176318266 0.4283463 -0.4299918 1.3526873
## 60672faa856f36729d2e523c -0.074094794 0.3890243 -1.0218945 0.7297932
## 6068ea9f856f36729d2e523e -0.052598797 0.3797743 -0.9990352 0.6790156
## 606db69d856f36729d2e5243 -0.052389636 0.3796989 -0.9876359 0.6870333
## 6075ab05856f36729d2e5247 0.054885087 0.3656994 -0.6145519 0.9511147
plot(scenario_quality1.all, ask = FALSE)
pp_check(scenario_quality1.all, nsamples = 200, type = "bars")
This means that our final model, with all data points and experience predictors, is scenario_quality1.all
To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.
mcmc_areas(scenario_quality1.all, pars = c("b_high_debt_versionfalse", "b_work_experience_programming.s"), prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c("High debt version: false", "Professional programming experience")) +
ggtitle("Beta parameters densities in scenario quality model", subtitle = "Shaded region marks 95% of the density. Line marks the median")
scale_programming_experience <- function(x) {
(x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}
post_settings <- expand.grid(
high_debt_version = c("false", "true"),
session = NA,
work_experience_programming.s = sapply(c(0, 3, 10, 25, 40), scale_programming_experience)
)
post <- posterior_predict(scenario_quality1.all, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
select(
estimate,
high_debt_version,
work_experience_programming
)%>%
mutate(estimate = estimate)
post.nice <- post %>% mutate_at("estimate", function(x) revalue(as.ordered(x), c(
"1"="Very Bad",
"2"="Bad",
"3"="Somewhat Bad",
"4"="Neutral",
"5"="Somewhat Good",
"6"="Good",
"7"="Very Good"
)))
data.frame(
High_debt_version_3_years = post.nice %>%
filter(high_debt_version == "true", work_experience_programming == 3) %>%
pull(estimate),
Low_debt_version_3_years = post.nice %>%
filter(high_debt_version == "false", work_experience_programming == 3) %>%
pull(estimate)
) %>%
likert() %>%
plot(
type="density",
facet = TRUE,
)
data.frame(
High_debt_version_25_years = post.nice %>%
filter(high_debt_version == "true", work_experience_programming == 25) %>%
pull(estimate),
Low_debt_version_25_years = post.nice %>%
filter(high_debt_version == "false", work_experience_programming == 25) %>%
pull(estimate)
) %>%
likert() %>%
plot(
type="density",
facet = TRUE,
)
post.diff <- post %>% filter(high_debt_version == "true")
post.diff$estimate = post.diff$estimate - filter(post, high_debt_version == "false")$estimate
post.diff %>%
ggplot(aes(x=estimate)) +
geom_boxplot(quantile_lines = TRUE, quantile_fun = hdi, vline_linetype = 2) +
facet_grid(rows = vars(work_experience_programming)) +
labs(
title = "Scenario rating diff / years of programming experience",
subtitle = "Difference as: high debt rating - low debt rating",
x = "Rating difference"
) +
scale_y_continuous(breaks = NULL)
We can then proceed to calculate some likelihoods:
post.diff.10 <- post.diff %>% filter(work_experience_programming == 10)
high_debt_rated_higher <- sum(post.diff.10$estimate > 0)
low_debt_rated_higher <- sum(post.diff.10$estimate < 0)
x <- low_debt_rated_higher / high_debt_rated_higher
x
## [1] 2.804348
Participants with 10 years of professional programming experience were 180% more likely to rate the high debt version scenario as worse than then they were to rate the low debt version scenario as worse.